package com.txl.cn.spark02

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

/**
  * Created by txl on 2017/12/27.
  */
object Teacher {
  // http://bigdata.edu360.cn/laozhang
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("Teacher").setMaster("local[*]")
    val sc = new SparkContext(conf)
    val lines = sc.textFile(args(0))
      val res=lines.map({
      line =>
        val str: Array[String] = line.split("/")

       val url = str(2)
       val i = url.indexOf(".",0)
        val subject=url.substring(0,i)
        val tName = str(3)
        (subject,tName)
    })
    val zuhe: RDD[((String, String), Int)] = res.map((_,1))
    val data: RDD[((String, String), Int)] = zuhe.reduceByKey(_+_)
    val tuples = data.sortBy(- _._2)take(2)
    tuples.foreach(println)

  }

}
